##############################################################################
# Supplementary Code: Single-cell RNA-seq analysis of growth-plate chondrocytes
# ---------------------------------------------------------------------------
# This script performs data import, quality control, batch-effect correction,
# clustering, sub-clustering and visualization of mouse cartilage samples.
# Software: R (≥ 4.2), Seurat (≥ 4.0), harmony, clustree, ggplot2
##############################################################################

# ---------- 1.  Global settings ----------
# All analyses were performed under R version 4.2.3 with the Seurat 4.3.0 pipeline.

# ---------- 2.  Data import ----------
list.files()
WT1 <- Read10X("Mouse1/")
WT2 <- Read10X("Mouse2/")
WT3 <- Read10X("Mouse3/")
WT4 <- Read10X("Mouse4/")
WT5 <- Read10X("Mouse5/")
Aga1 <- Read10X("AGA1/")
Aga2 <- Read10X("AGA2/")
Aga3 <- Read10X("AGA3/")
Aga4 <- Read10X("AGA4/")
Aga5 <- Read10X("AGA5/")

# ---------- 3.  Seurat object creation ----------
create_obj <- function(counts, sample_name) {
  CreateSeuratObject(counts = counts,
                     project = sample_name,
                     min.cells = 3,
                     min.features = 200)
}
WT1 <- create_obj(WT1, "WT1"); WT2 <- create_obj(WT2, "WT2")
WT3 <- create_obj(WT3, "WT3"); WT4 <- create_obj(WT4, "WT4")
WT5 <- create_obj(WT5, "WT5")
Aga1 <- create_obj(Aga1, "Aga1"); Aga2 <- create_obj(Aga2, "Aga2")
Aga3 <- create_obj(Aga3, "Aga3"); Aga4 <- create_obj(Aga4, "Aga4")
Aga5 <- create_obj(Aga5, "Aga5")

# ---------- 4.  Record raw cell numbers ----------
raw_cells <- c(ncol(WT1), ncol(WT2), ncol(WT3), ncol(WT4), ncol(WT5),
               ncol(Aga1), ncol(Aga2), ncol(Aga3), ncol(Aga4), ncol(Aga5))
sample_names <- c("WT1", "WT2", "WT3", "WT4", "WT5",
                  "Aga1", "Aga2", "Aga3", "Aga4", "Aga5")
qc_table_raw <- data.frame(Sample = sample_names, Raw_Cells = raw_cells)

# ---------- 5.  Add group information ----------
for (i in list(WT1, WT2, WT3, WT4, WT5)) i$group <- "WT"
for (i in list(Aga1, Aga2, Aga3, Aga4, Aga5)) i$group <- "Aga2"

# ---------- 6.  Quality control ----------
qc <- function(obj) {
  obj[["percent.mt"]] <- PercentageFeatureSet(obj, pattern = "^mt-")
  obj <- subset(obj,
                subset = nFeature_RNA > 250 &
                         nCount_RNA > 500 &
                         percent.mt < 20) %>%
         NormalizeData()
  return(obj)
}
WT1 <- qc(WT1); WT2 <- qc(WT2); WT3 <- qc(WT3); WT4 <- qc(WT4); WT5 <- qc(WT5)
Aga1 <- qc(Aga1); Aga2 <- qc(Aga2); Aga3 <- qc(Aga3); Aga4 <- qc(Aga4); Aga5 <- qc(Aga5)

# ---------- 7.  Record post-QC cell numbers ----------
qc_cells <- c(ncol(WT1), ncol(WT2), ncol(WT3), ncol(WT4), ncol(WT5),
              ncol(Aga1), ncol(Aga2), ncol(Aga3), ncol(Aga4), ncol(Aga5))
qc_table <- data.frame(Sample = sample_names,
                       Raw_Cells = raw_cells,
                       Cells_after_QC = qc_cells,
                       Cells_Removed = raw_cells - qc_cells,
                       Removal_Rate = round((raw_cells - qc_cells) / raw_cells * 100, 2))
write.csv(qc_table, "Cell_Counts.csv", row.names = FALSE)

# ---------- 8.  Merge objects ----------
merge <- merge(x = WT1, y = c(WT2, WT3, WT4, WT5, Aga1, Aga2, Aga3, Aga4, Aga5))
merge <- JoinLayers(merge)

# ---------- 9.  Harmony batch correction ----------
merge <- merge %>%
  FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
  ScaleData(features = rownames(merge)) %>%
  RunPCA(npcs = 50, verbose = FALSE) %>%
  RunHarmony(group.by.vars = "orig.ident")

# ---------- 10.  Elbow-plot based PC selection ----------
pct <- merge[["pca"]]@stdev / sum(merge[["pca"]]@stdev) * 100
cumu <- cumsum(pct)
pc.use <- min(which(cumu > 90 & pct < 5)[1],
              sort(which((pct[1:length(pct) - 1] - pct[2:length(pct)]) > 0.1), decreasing = TRUE)[1] + 1)
ElbowPlot(merge, ndims = 50, reduction = "pca") +
  geom_vline(xintercept = pc.use, color = "darkred")

# ---------- 11.  Clustering (resolution sweep) ----------
merge <- FindClusters(merge, resolution = seq(0.1, 1.6, 0.2))
clustree(merge@meta.data, prefix = "RNA_snn_res.")

# ---------- 12.  Final clustering ----------
merge <- FindClusters(merge, resolution = 0.7)
merge <- RunUMAP(merge, reduction = "harmony", dims = 1:pc.use) %>%
         RunTSNE(reduction = "harmony", dims = 1:pc.use)

# ---------- 13.  Save merged object ----------
save(merge, file = "merge.Rdata")

# ---------- 14.  Cell-type annotation ----------
load("merge.Rdata")
Idents(merge) <- "seurat_clusters"
merge <- RenameIdents(merge,
                      "0" = "Chondrocytes", "1" = "Chondrocytes", "2" = "Chondrocytes",
                      "3" = "Chondrocytes", "4" = "Chondrocytes", "5" = "Chondrocytes",
                      "6" = "Chondrocytes", "7" = "Chondrocytes", "8" = "Chondrocytes",
                      "9" = "Chondrocytes", "10" = "Osteoblasts", "11" = "Chondrocytes",
                      "12" = "Chondrocytes", "13" = "Osteoblasts", "14" = "Osteoblasts",
                      "15" = "Osteoblasts", "16" = "Chondrocytes", "17" = "Osteoblasts",
                      "18" = "Endothelial cells", "19" = "Macrophages", "20" = "Chondrocytes")
merge$celltype <- merge@active.ident
save(merge, file = "merge.relabel2.Rdata")

# ---------- 15.  Visualization ----------
load("merge.relabel2.Rdata")
sample_color <- c("#66c2a5", "#fc8d62", "#8da0cb", "#e78ac3")  # Chondro, Osteo, Endo, Macro

pdf("umap_annotated.pdf", width = 10, height = 8)
DimPlot(merge, reduction = "umap", label = TRUE, label.size = 4.5) +
  scale_color_manual(values = sample_color)
dev.off()

pdf("umap_by_group.pdf", width = 10, height = 8)
DimPlot(merge, reduction = "umap", label = FALSE, group.by = "group") +
  scale_color_manual(values = c("WT" = "#8dd1c6", "Aga2" = "#de554c"))
dev.off()

pdf("umap_split_sample.pdf", width = 15, height = 4)
DimPlot(merge, reduction = "umap", label = FALSE, split.by = "orig.ident") +
  scale_color_manual(values = sample_color)
dev.off()

# ---------- 16.  Dot plot ----------
pdf("marker_dotplot.pdf", width = 15, height = 10)
DotPlot(merge,
        features = c("Cd14", "Cd68", "Col1a1", "Col3a1",
                     "Col2a1", "Acan", "Sox9",
                     "Cdh5", "Pecam1")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), text = element_text(size = 17)) +
  scale_color_gradientn(colours = c("#77aed3", "#ffffff", "#b72128")) +
  scale_size_area(max_size = 12)
dev.off()

# ---------- 17.  Feature plot ----------
pdf("marker_featureplot.pdf", width = 12, height = 10)
FeaturePlot(merge, features = c("Cd14", "Col1a1", "Col2a1", "Pecam1"),
            cols = c("lightgrey", "#de554c"))
dev.off()

# ---------- 18.  Alluvial plot ----------
library(reshape2)
pB2_df <- table(merge$celltype, merge$group) %>% melt()
colnames(pB2_df) <- c("Cluster", "Group", "Number")
pB2_df$Group <- factor(pB2_df$Group, levels = c("WT", "Aga2"))
pB2_df <- subset(pB2_df, Cluster %in% c("Endothelial cells", "Macrophages",
                                        "Chondrocytes", "Osteoblasts"))
pB2_df$Cluster <- factor(pB2_df$Cluster,
                         levels = c("Endothelial cells", "Macrophages",
                                    "Chondrocytes", "Osteoblasts"))
pB2_df <- pB2_df %>%
  group_by(Group) %>%
  mutate(percent = Number / sum(Number))

library(ggalluvial)
pdf("alluvial_group.pdf", width = 8, height = 10)
ggplot(pB2_df, aes(x = Group, y = percent, fill = Cluster,
                   stratum = Cluster, alluvium = Cluster)) +
  geom_col(width = 0.6, color = NA, size = 0.5) +
  geom_flow(width = 0.6, alpha = 0.22, knot.pos = 0.35,
            color = "white", size = 0.5) +
  geom_alluvium(width = 0.6, alpha = 1, knot.pos = 0.35,
                fill = NA, color = "white", size = 0.5) +
  scale_y_continuous(labels = scales::percent) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title = element_text(size = 20),
        axis.text = element_text(size = 20)) +
  scale_fill_manual(values = sample_color)
dev.off()

# ---------- 19.  Subset chondrocytes ----------
Chondrocytes <- subset(merge, celltype == "Chondrocytes")
save(Chondrocytes, file = "Chondrocytes.Rdata")

# ---------- 20.  Second-round clustering of chondrocytes ----------
load("Chondrocytes.Rdata")
Chondrocytes <- Chondrocytes %>%
  FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
  ScaleData(features = rownames(Chondrocytes)) %>%
  RunPCA(npcs = 50, verbose = FALSE) %>%
  RunHarmony(group.by.vars = "orig.ident")

pct <- Chondrocytes[["pca"]]@stdev / sum(Chondrocytes[["pca"]]@stdev) * 100
cumu <- cumsum(pct)
pc.use <- min(which(cumu > 90 & pct < 5)[1],
              sort(which((pct[1:length(pct) - 1] - pct[2:length(pct)]) > 0.1), decreasing = TRUE)[1] + 1)

Chondrocytes <- FindClusters(Chondrocytes, resolution = seq(0.1, 1.6, 0.2))
clustree(Chondrocytes@meta.data, prefix = "RNA_snn_res.")
Chondrocytes <- FindClusters(Chondrocytes, resolution = 0.1)

Chondrocytes <- RunUMAP(Chondrocytes, reduction = "harmony", dims = 1:pc.use) %>%
                RunTSNE(reduction = "harmony", dims = 1:pc.use)

save(Chondrocytes, file = "Chondrocytes.Rdata")

# ---------- 21.  Chondrocyte sub-annotation ----------
Idents(Chondrocytes) <- "seurat_clusters"
Chondrocytes <- RenameIdents(Chondrocytes,
                             "0" = "Resting",
                             "1" = "Pre-hypertrophic",
                             "2" = "Proliferating",
                             "3" = "Hypertrophic")
Chondrocytes$stim <- Chondrocytes@active.ident
save(Chondrocytes, file = "Chondrocytes.Rdata")

# ---------- 22.  Visualize chondrocyte subtypes ----------
chondro_cols <- c("#66c2a5", "#fc8d62", "#8da0cb", "#e78ac3")

pdf("chondro_umap_subtype.pdf", width = 10, height = 8)
DimPlot(Chondrocytes, reduction = "umap", label = TRUE, label.size = 4.5) +
  scale_color_manual(values = chondro_cols)
dev.off()

pdf("chondro_group.pdf", width = 10, height = 8)
DimPlot(Chondrocytes, reduction = "umap", label = FALSE, group.by = "group") +
  scale_color_manual(values = c("WT" = "#8dd1c6", "Aga2" = "#de554c"))
dev.off()

pdf("chondro_split_sample.pdf", width = 15, height = 4)
DimPlot(Chondrocytes, reduction = "umap", label = FALSE, split.by = "orig.ident") +
  scale_color_manual(values = chondro_cols)
dev.off()

# ---------- 23.  Dot & feature plots for chondrocyte markers ----------
features <- c("Sox9", "Pcna", "Pth1r", "Vegfa")
pdf("chondro_dotplot.pdf", width = 10, height = 4)
DotPlot(Chondrocytes, features = features) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), text = element_text(size = 17)) +
  scale_color_gradientn(colours = c("#77aed3", "#ffffff", "#b72128")) +
  scale_size_area(max_size = 12)
dev.off()

pdf("chondro_featureplot.pdf", width = 12, height = 10)
FeaturePlot(Chondrocytes, features = features, cols = c("lightgrey", "#de554c"))
dev.off()

# ---------- 24.  Alluvial plot (chondrocyte subtypes vs group) ----------
pB2_df <- table(Chondrocytes$stim, Chondrocytes$group) %>% melt()
colnames(pB2_df) <- c("Cluster", "Group", "Number")
pB2_df$Group <- factor(pB2_df$Group, levels = c("WT", "Aga2"))
pB2_df$Cluster <- factor(pB2_df$Cluster,
                         levels = c("Resting", "Proliferating",
                                    "Pre-hypertrophic", "Hypertrophic"))
pB2_df <- pB2_df %>% group_by(Group) %>% mutate(percent = Number / sum(Number))

pdf("chondro_alluvial.pdf", width = 8, height = 10)
ggplot(pB2_df, aes(x = Group, y = percent, fill = Cluster,
                   stratum = Cluster, alluvium = Cluster)) +
  geom_col(width = 0.6, color = NA, size = 0.5) +
  geom_flow(width = 0.6, alpha = 0.22, knot.pos = 0.35,
            color = "white", size = 0.5) +
  geom_alluvium(width = 0.6, alpha = 1, knot.pos = 0.35,
                fill = NA, color = "white", size = 0.5) +
  scale_y_continuous(labels = scales::percent) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title = element_text(size = 20),
        axis.text = element_text(size = 20)) +
  scale_fill_manual(values = chondro_cols)
dev.off()

# ---------- 25.  Session info ----------
sessionInfo()

